#!/usr/bin/env bash
# db-create_calibration_view -- Creates a variable's *_calibration view in a PostgreSQL database
# > eval "$(db-parse "$url")"
# > export DEEPDIVE_CALIBRATION_NUM_BUCKETS=10
# > db-create_calibration_view TABLE
##
set -euo pipefail

table=${1:?The TABLE holding the variable must be given, e.g., has_spouse}

: ${DEEPDIVE_CALIBRATION_NUM_BUCKETS:=10}
num_buckets=$DEEPDIVE_CALIBRATION_NUM_BUCKETS

db-create-view-as "${table}_calibration" "
    WITH bucketed AS (
        SELECT dd_label AS label
             , CASE
                 WHEN expectation = 1 THEN $(($num_buckets - 1))
                 ELSE FLOOR(expectation * ${num_buckets})
               END AS bucket
        FROM ${table}_inference
    )
    SELECT universe.bucket                             AS bucket
         , universe.count                              AS num_variables
         , COALESCE(positive.count, 0)                 AS num_correct
         , COALESCE(negative.count, 0)                 AS num_incorrect
         , (universe.bucket     ) / $num_buckets.      AS probability_lo
         , (universe.bucket +  1) / ${num_buckets}.    AS probability_hi
         , (universe.bucket + .5) / ${num_buckets}.    AS probability
         , CASE WHEN COALESCE(positive.count, 0) +
                     COALESCE(negative.count, 0) <> 0 THEN
                COALESCE(positive.count, 0)::FLOAT / (
                         COALESCE(positive.count, 0) +
                         COALESCE(negative.count, 0) )
           ELSE NULL END                               AS accuracy
         , COALESCE(positive.count, 0) +
           COALESCE(negative.count, 0)                 AS num_predictions_test
         , universe.count                              AS num_predictions_whole
      FROM (
            SELECT bucket, COUNT(*) AS count
              FROM bucketed
             GROUP BY bucket
           ) universe
      LEFT JOIN (
            SELECT bucket, COUNT(*) AS count
              FROM bucketed
             WHERE label = true
             GROUP BY bucket
           ) positive ON universe.bucket = positive.bucket
      LEFT JOIN (
            SELECT bucket, COUNT(*) AS count
              FROM bucketed
             WHERE label = false
             GROUP BY bucket
           ) negative ON universe.bucket = negative.bucket
     ORDER BY universe.bucket
"
